package cn.edu.bjtu.classimpl.parser;

import cn.edu.bjtu.classimpl.documententity.Document;
import cn.edu.bjtu.interfaces.document.IDocument;
import cn.edu.bjtu.interfaces.parser.Parser;

/**
 * @author LIYAO
 * 处理从数据库导出的数据
 * 2017-05-10
 *
 */
public class DataBaseWechatParser implements Parser {

    @Override
    public IDocument parse(String line) {
	if (line == null)
	    return null;
	String parts[] = line.split("\t");
	String url = new StringBuilder().append(parts[1] + "," + parts[2]).toString();
	String label;
	String content;
	if (parts[0].startsWith("$")) {  //自定义标识符，简化是否有类标的出处理
	    label = parts[0];
	    content = parts[7];
	} else {
	    label = "unknown";
	    content = parts[6];
	}
	return new Document(url, label, content);
    }

}
